from tableauscraper import TableauScraper as TS
url = "https://visualizedata.ucop.edu/t/Public/views/TransferbyCCM/Bymajorname"
ts = TS()
ts.loads(url)
workbook = ts.getWorkbook()
for t in workbook.worksheets:
# Show worksheet name
print(f"worksheet name : {t.name}")
worksheet name : Broad Disc Select Label worksheet name : Campus Broad worksheet name : Major Table worksheet name : Yield and Admit
# Find filter values
filters = {}
for t in workbook.worksheets:
# Show worksheet name
print(f"name : {t.name}")
# Show filters
print(t.getFilters())
# Save filter settings for later use
for f in t.getFilters():
print(f['column'])
filters[f['column']] = f
name : Broad Disc Select Label
[]
name : Campus Broad
[]
name : Major Table
[{'column': 'Campus', 'ordinal': 0, 'values': ['UCB', 'UCD', 'UCI', 'UCLA', 'UCM', 'UCR', 'UCSB', 'UCSC', 'UCSD'], 'globalFieldName': '[federated.1xtee380yrajk410jnsfd1fsy47h].[none:CMP_LOC_LOC1_SHRT_DESC:nk]', 'selection': ['UCB'], 'selectionAlt': [{'fn': '[federated.1xtee380yrajk410jnsfd1fsy47h].[none:CMP_LOC_LOC1_SHRT_DESC:nk]', 'columnFullNames': ['[Campus]'], 'domainTables': [{'isSelected': True, 'label': 'UCB'}]}]}, {'column': 'T Acad Yr', 'ordinal': 0, 'values': [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021], 'globalFieldName': '[federated.1xtee380yrajk410jnsfd1fsy47h].[none:T_ACAD_YR:ok]', 'selection': [2021], 'selectionAlt': [{'fn': '[federated.1xtee380yrajk410jnsfd1fsy47h].[none:T_ACAD_YR:ok]', 'columnFullNames': ['[T Acad Yr]'], 'domainTables': [{'label': '2012'}]}]}]
Campus
T Acad Yr
name : Yield and Admit
[]
# Get the values for the Campus filter
values = filters['Campus']['values']
values
['UCB', 'UCD', 'UCI', 'UCLA', 'UCM', 'UCR', 'UCSB', 'UCSC', 'UCSD']
dataYA={}
dataMT={}
dataCB={}
dataBDSL={}
# For each campus, create dataframes for each worksheet
for c in values:
ws = ts.getWorksheet("Major Table")
ucWB = ws.setFilter('Campus',c)
UC = ucWB.getWorksheet('Yield and Admit')
dataYA[c] = UC.data
UC = ucWB.getWorksheet('Broad Disc Select Label')
dataBDSL[c] = UC.data
UC = ucWB.getWorksheet('Major Table')
dataMT[c] = UC.data
dataMT[c]['University'] = c
UC = ucWB.getWorksheet('Campus Broad')
dataCB[c] = UC.data
print(c)
UCB UCD UCI UCLA UCM UCR UCSB UCSC UCSD
from collections import Counter
# Count the number of occurances of each major
dataMT.keys()
counter = Counter()
for i in dataMT.keys():
counter.update(dataMT[i]['Major name-value'].tolist())
values_rate = []
# Find each university's admit rate
for c in values:
values_rate.append((c, dataYA[c][dataYA[c]['Measure Names-alias'].isin(['Admit rate'])].iat[0,2]))
# Sort universities by admit rate
values_rate.sort(key=lambda x:x[1])
values_rate = [x[0] for x in values_rate]
values_rate
['UCLA', 'UCB', 'UCI', 'UCSB', 'UCD', 'UCSD', 'UCSC', 'UCM', 'UCR']
import pandas as pd
def create_majors_df(num_colleges):
"""
Create a cleaned up data set with majors that
exist in more than num_colleges universities.
"""
# Find majors that occur at more than requested colleges
majors = [t[0] for t in counter.items() if t[1]>num_colleges]
# Concatenate data frames from all universities
df = dataMT[values_rate[0]].copy()
for u in values_rate[1:]:
df = pd.concat([df, dataMT[u]], axis=0)
# Drop duplicated columns
df = df[df['Major name-value'].isin(majors)]
cols = [m for m in list(df.columns) if 'alias' in m]
df = df.drop(columns=cols)
# Clean up column names
df = df.rename(
columns=lambda x: x.replace('SUM(','')
.replace('AGG(','')
.replace(')','')
.replace('-value',''))
return df
# Create data frame with majors that exist in more than 7 UC's
tot_majors = create_majors_df(7)
# Pivot data frame to one column per major and admit rates per campus as cell values
pivot2 = tot_majors.pivot_table(index='University', columns='Major name', values='Admit rate')
pivot2 = pivot2.reindex(values_rate)
pivot2
| Major name | Anthropology | Chemistry | Computer science | Economics | History | Philosophy | Physics | Sociology |
|---|---|---|---|---|---|---|---|---|
| University | ||||||||
| UCLA | 0.366359 | 0.230769 | 0.023535 | NaN | NaN | 0.406593 | 0.367021 | NaN |
| UCB | 0.145695 | 0.308271 | 0.039669 | 0.221339 | 0.188623 | 0.510448 | 0.398907 | 0.177609 |
| UCI | 0.417808 | 0.408676 | 0.111836 | 0.325048 | 0.685629 | 0.713725 | 0.351724 | 0.428203 |
| UCSB | 0.506329 | 0.561247 | 0.096636 | 0.578329 | 0.515385 | 0.462633 | 0.630252 | 0.539167 |
| UCD | 0.711864 | 0.770270 | 0.284636 | 0.594234 | 0.728682 | 0.615385 | 0.538462 | 0.686567 |
| UCSD | NaN | 0.580882 | 0.158818 | 0.581800 | 0.727794 | 0.715415 | 0.555556 | 0.625150 |
| UCSC | 0.793846 | 0.454054 | 0.316677 | 0.741525 | 0.822526 | 0.812834 | 0.709302 | 0.667989 |
| UCM | 0.794521 | NaN | NaN | 0.739130 | 0.785714 | NaN | NaN | 0.731183 |
| UCR | 0.804651 | 0.592965 | 0.379851 | 0.810219 | 0.816406 | 0.847826 | 0.631148 | 0.817017 |
# Create a second data frame with more majors
tot_majors2 = create_majors_df(3)
# Drop unused columns
cols2 = [m for m in list(tot_majors.columns) if m not in ['Major name', 'Admit rate','Applicants']]
tot_majors2 = tot_majors2.drop(columns=cols2)
# Create new column with the partial sum of the number of applicants per major
tot_majors2['Total Applicants'] = tot_majors2.groupby(['Major name']).Applicants.transform('sum')
# Sort rows by new column
tot_majors2 = tot_majors2.sort_values(by=['Total Applicants'], ascending=False)
tot_majors2
| Major name | Applicants | Admit rate | Total Applicants | |
|---|---|---|---|---|
| 5 | Computer science | 1397 | 0.096636 | 13297 |
| 21 | Computer science | 1997 | 0.023535 | 13297 |
| 32 | Computer science | 1458 | 0.284636 | 13297 |
| 19 | Computer science | 1996 | 0.158818 | 13297 |
| 14 | Computer science | 1340 | 0.379851 | 13297 |
| ... | ... | ... | ... | ... |
| 76 | Geology | 20 | 0.3 | 182 |
| 71 | Geology | 30 | 0.7 | 182 |
| 67 | Geology | 22 | 0.318182 | 182 |
| 26 | Geology | 83 | 0.46988 | 182 |
| 47 | Geology | 27 | 0.444444 | 182 |
180 rows × 4 columns
# Create data frame with majors that exist in more than 7 UC's
tot_majors3 = create_majors_df(7)
# Create a new column for the lower GPA admit range limit
tot_majors3['Lower GPA'] = tot_majors['Admit GPA range'].str[0:4].astype(float)
# Pivot data frame to one column per major and lower GPA limit per campus as cell values
pivot = tot_majors3.pivot_table(index='University', columns='Major name', values='Lower GPA')
pivot = pivot.reindex(values_rate)
pivot
| Major name | Anthropology | Chemistry | Computer science | Economics | History | Philosophy | Physics | Sociology |
|---|---|---|---|---|---|---|---|---|
| University | ||||||||
| UCLA | 3.73 | 3.84 | 3.96 | NaN | NaN | 3.77 | 3.84 | NaN |
| UCB | 3.67 | 3.61 | 3.81 | 3.69 | 3.81 | 3.66 | 3.54 | 3.64 |
| UCI | 3.69 | 3.64 | 3.92 | 3.79 | 3.44 | 3.41 | 3.65 | 3.69 |
| UCSB | 3.53 | 3.59 | 3.97 | 3.52 | 3.55 | 3.54 | 3.54 | 3.52 |
| UCD | 3.32 | 3.26 | 3.79 | 3.56 | 3.39 | 3.48 | 3.54 | 3.48 |
| UCSD | NaN | 3.49 | 3.92 | 3.57 | 3.29 | 3.30 | 3.47 | 3.47 |
| UCSC | 3.05 | 3.16 | 3.43 | 3.15 | 3.05 | 3.07 | 3.29 | 3.06 |
| UCM | 3.02 | NaN | NaN | 3.16 | 2.99 | NaN | NaN | 3.08 |
| UCR | 2.98 | 3.02 | 3.51 | 3.07 | 3.00 | 3.05 | 3.03 | 3.02 |
import plotly.graph_objects as go
def drop_down_plot(df, title):
"""
Create an interactive visualization with
a drop down menu to select filter.
Inspired by: Shinichi Okada
https://towardsdatascience.com/how-to-create-an-interactive-dropdown-in-jupyter-322277f58a68
"""
fig = go.Figure()
# Add points to plot and trace
for column in df.columns.to_list():
fig.add_trace(
go.Scatter(
x = df.index,
y = df[column],
name = column
)
)
# Create the 'All' button that makes all columns visible
# by setting 'visible' to True for all columns
button_all = dict(label = 'All',
method = 'update',
args = [{'visible': df.columns.isin(df.columns),
'title': 'All',
'showlegend':True}])
def create_layout_button(column):
"""
Create a button for the provided column which will
make the data associated with the column visible.
The name property of the plot lines is used to
toggle visibility.
"""
return dict(label = column,
method = 'update',
args = [{'visible': df.columns.isin([column]),
'title': column,
'showlegend': True}])
# Create drop down menu and add the 'All'button
# and one button for each column in the data frame
fig.update_layout(
updatemenus=[go.layout.Updatemenu(
active = 0,
buttons = [button_all] + list(df.columns.map(lambda column: create_layout_button(column)))
)
]
)
# Update remaining layout properties
fig.update_layout(
title_text=title,
height=800
)
fig.show()
import seaborn as sns
sns.set(rc={'figure.figsize':(15,12)})
# Plot major admit rates with majors sorted by
# total number of applicants from low to high
sns.boxplot(data=tot_majors2, x='Admit rate', y='Major name').set(title='Admit Rates of Common Majors')
print('')
# Interactive plot of admit rate by major with
# UC's sorted by acceptance rate from low to high
drop_down_plot(pivot2, title="Admit Rates Per Campus of Most Common Majors")
# Interactive plot of lower GPA admit limit per major
# with UC's sorted by acceptance rate from low to high
drop_down_plot(pivot, title="Lower GPA Admit Limit of Most Common Majors by Campus")
from scipy import stats
# Test for significance of major with admit rates
group1 = pivot2.iloc[:, 0].dropna().values.tolist()
group2 = pivot2.iloc[:, 1].dropna().values.tolist()
group3 = pivot2.iloc[:, 2].dropna().values.tolist()
group4 = pivot2.iloc[:, 3].dropna().values.tolist()
group5 = pivot2.iloc[:, 4].dropna().values.tolist()
group6 = pivot2.iloc[:, 5].dropna().values.tolist()
group7 = pivot2.iloc[:, 6].dropna().values.tolist()
group8 = pivot2.iloc[:, 7].dropna().values.tolist()
# Conduct Krushal-Wallis test on 8 most common majors
stats.kruskal(group1,group2,group3,group4,group5,group6,group7,group8)
# Since p < 0.05 reject null hypothesis and conclude significant
# difference between majors in terms of acceptance rate
KruskalResult(statistic=21.64038461538462, pvalue=0.0029294091653327367)
from statsmodels.stats.multicomp import pairwise_tukeyhsd
tot_majors['Admit rate'] = tot_majors['Admit rate'].astype(float)
# Perform Tukey HSD test to test for pairwise significance
tukey = pairwise_tukeyhsd(endog=tot_majors['Admit rate'],
groups=tot_majors['Major name'],
alpha=0.05)
# Convert results to a pandas data frame
tukey_df = pd.DataFrame(data=tukey._results_table.data[1:], columns=tukey._results_table.data[0])
tukey_df.rename(columns = {'p-adj':'p_adj'}, inplace = True)
tukey_df
# Conclude statistically significant difference between the means of the admit
# rates for the major pair if p<0.05, indicated also by True in 'reject' column
| group1 | group2 | meandiff | p_adj | lower | upper | reject | |
|---|---|---|---|---|---|---|---|
| 0 | Anthropology | Chemistry | -0.0993 | 0.9582 | -0.3881 | 0.1896 | False |
| 1 | Anthropology | Computer science | -0.3912 | 0.0028 | -0.6884 | -0.0939 | True |
| 2 | Anthropology | Economics | 0.0063 | 1.0000 | -0.2909 | 0.3035 | False |
| 3 | Anthropology | History | 0.0912 | 0.9775 | -0.2060 | 0.3884 | False |
| 4 | Anthropology | Philosophy | 0.0680 | 0.9961 | -0.2293 | 0.3652 | False |
| 5 | Anthropology | Physics | -0.0448 | 0.9997 | -0.3421 | 0.2524 | False |
| 6 | Anthropology | Sociology | 0.0165 | 1.0000 | -0.2808 | 0.3137 | False |
| 7 | Chemistry | Computer science | -0.2919 | 0.0459 | -0.5808 | -0.0031 | True |
| 8 | Chemistry | Economics | 0.1056 | 0.9424 | -0.1833 | 0.3944 | False |
| 9 | Chemistry | History | 0.1905 | 0.4433 | -0.0984 | 0.4793 | False |
| 10 | Chemistry | Philosophy | 0.1672 | 0.6086 | -0.1216 | 0.4561 | False |
| 11 | Chemistry | Physics | 0.0544 | 0.9988 | -0.2344 | 0.3433 | False |
| 12 | Chemistry | Sociology | 0.1157 | 0.9093 | -0.1731 | 0.4046 | False |
| 13 | Computer science | Economics | 0.3975 | 0.0022 | 0.1003 | 0.6947 | True |
| 14 | Computer science | History | 0.4824 | 0.0001 | 0.1852 | 0.7796 | True |
| 15 | Computer science | Philosophy | 0.4592 | 0.0002 | 0.1619 | 0.7564 | True |
| 16 | Computer science | Physics | 0.3463 | 0.0119 | 0.0491 | 0.6436 | True |
| 17 | Computer science | Sociology | 0.4077 | 0.0016 | 0.1104 | 0.7049 | True |
| 18 | Economics | History | 0.0849 | 0.9850 | -0.2123 | 0.3821 | False |
| 19 | Economics | Philosophy | 0.0617 | 0.9979 | -0.2356 | 0.3589 | False |
| 20 | Economics | Physics | -0.0512 | 0.9994 | -0.3484 | 0.2461 | False |
| 21 | Economics | Sociology | 0.0102 | 1.0000 | -0.2871 | 0.3074 | False |
| 22 | History | Philosophy | -0.0232 | 1.0000 | -0.3205 | 0.2740 | False |
| 23 | History | Physics | -0.1360 | 0.8346 | -0.4333 | 0.1612 | False |
| 24 | History | Sociology | -0.0747 | 0.9930 | -0.3720 | 0.2225 | False |
| 25 | Philosophy | Physics | -0.1128 | 0.9304 | -0.4100 | 0.1844 | False |
| 26 | Philosophy | Sociology | -0.0515 | 0.9993 | -0.3487 | 0.2457 | False |
| 27 | Physics | Sociology | 0.0613 | 0.9979 | -0.2359 | 0.3585 | False |
# Create interactive plot of Tukey HSD test
# DATAFRAME HEADING FOR DOCS
fig = go.Figure(data=[go.Table(
header=dict(values=list(tukey_df.columns),
fill_color='cornflowerblue',
align='left'),
cells=dict(values=[tukey_df.group1,
tukey_df.group2,
tukey_df.meandiff,
tukey_df.p_adj,
tukey_df.lower,
tukey_df.upper,
tukey_df.reject],
fill_color='ivory',
align='left'))
])
fig.update_layout(
title_text = "Tukey Table for Comparisons of Admit Rate Per Major",
title_font_size=30,
font_family="Times New Roman",
font_color="black",
title_font_family="Times New Roman",
title_font_color="black",
)
fig.update_traces(cells_font=dict(size = 12))
fig.show()
tot_majors
| Broad discipline | Major name | College/School | Applicants | Enrolls | Admits | Enroll GPA range | Yield rate | Admit GPA range | Admit rate | University | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 21 | Computer & Information sciences | Computer science | School Of Engineering & applied sci | 1997 | 29 | 47 | 3.93 - 4.00 | 0.617021 | 3.96 - 4.00 | 0.023535 | UCLA |
| 60 | Letters | Philosophy | College Of Letters & science | 364 | 78 | 148 | 3.75 - 4.00 | 0.527027 | 3.77 - 4.00 | 0.406593 | UCLA |
| 73 | Physical Sciences | Chemistry | College Of Letters & science | 286 | 35 | 66 | 3.83 - 3.99 | 0.530303 | 3.84 - 4.00 | 0.230769 | UCLA |
| 77 | Physical Sciences | Physics | College Of Letters & science | 188 | 32 | 69 | 3.85 - 3.94 | 0.463768 | 3.84 - 3.96 | 0.367021 | UCLA |
| 81 | Social Sciences | Anthropology | College Of Letters & science | 434 | 110 | 159 | 3.70 - 3.94 | 0.691824 | 3.73 - 3.96 | 0.366359 | UCLA |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 46 | Physical Sciences | Chemistry | College Of Natural & agricultural sci | 199 | 17 | 118 | 2.72 - 3.13 | 0.144068 | 3.02 - 3.61 | 0.592965 | UCR |
| 48 | Physical Sciences | Physics | College Of Natural & agricultural sci | 122 | 11 | 77 | 2.97 - 3.47 | 0.142857 | 3.03 - 3.77 | 0.631148 | UCR |
| 52 | Social Sciences | Anthropology | College Of Humanities & social sci | 215 | 38 | 173 | 2.79 - 3.47 | 0.219653 | 2.98 - 3.63 | 0.804651 | UCR |
| 54 | Social Sciences | Economics | College Of Humanities & social sci | 548 | 72 | 444 | 2.85 - 3.38 | 0.162162 | 3.07 - 3.70 | 0.810219 | UCR |
| 59 | Social Sciences | Sociology | College Of Humanities & social sci | 1093 | 189 | 893 | 2.89 - 3.35 | 0.211646 | 3.02 - 3.66 | 0.817017 | UCR |
65 rows × 11 columns
# Convert majors into dummy variables
mlr = pd.get_dummies(data=tot_majors, columns=['Major name'])
mlr
# Set up data for multiple linear regression
x = mlr[['Major name_Anthropology', 'Major name_Chemistry',
'Major name_Computer science','Major name_Economics',
'Major name_History', 'Major name_Philosophy',
'Major name_Physics','Major name_Sociology']]
y = mlr['Admit rate']
| Broad discipline | College/School | Applicants | Enrolls | Admits | Enroll GPA range | Yield rate | Admit GPA range | Admit rate | University | Major name_Anthropology | Major name_Chemistry | Major name_Computer science | Major name_Economics | Major name_History | Major name_Philosophy | Major name_Physics | Major name_Sociology | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 21 | Computer & Information sciences | School Of Engineering & applied sci | 1997 | 29 | 47 | 3.93 - 4.00 | 0.617021 | 3.96 - 4.00 | 0.023535 | UCLA | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 60 | Letters | College Of Letters & science | 364 | 78 | 148 | 3.75 - 4.00 | 0.527027 | 3.77 - 4.00 | 0.406593 | UCLA | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 73 | Physical Sciences | College Of Letters & science | 286 | 35 | 66 | 3.83 - 3.99 | 0.530303 | 3.84 - 4.00 | 0.230769 | UCLA | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 77 | Physical Sciences | College Of Letters & science | 188 | 32 | 69 | 3.85 - 3.94 | 0.463768 | 3.84 - 3.96 | 0.367021 | UCLA | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 81 | Social Sciences | College Of Letters & science | 434 | 110 | 159 | 3.70 - 3.94 | 0.691824 | 3.73 - 3.96 | 0.366359 | UCLA | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 46 | Physical Sciences | College Of Natural & agricultural sci | 199 | 17 | 118 | 2.72 - 3.13 | 0.144068 | 3.02 - 3.61 | 0.592965 | UCR | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 48 | Physical Sciences | College Of Natural & agricultural sci | 122 | 11 | 77 | 2.97 - 3.47 | 0.142857 | 3.03 - 3.77 | 0.631148 | UCR | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 52 | Social Sciences | College Of Humanities & social sci | 215 | 38 | 173 | 2.79 - 3.47 | 0.219653 | 2.98 - 3.63 | 0.804651 | UCR | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 54 | Social Sciences | College Of Humanities & social sci | 548 | 72 | 444 | 2.85 - 3.38 | 0.162162 | 3.07 - 3.70 | 0.810219 | UCR | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 59 | Social Sciences | College Of Humanities & social sci | 1093 | 189 | 893 | 2.89 - 3.35 | 0.211646 | 3.02 - 3.66 | 0.817017 | UCR | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
65 rows × 18 columns
from sklearn import linear_model
# Fit linear regression model using sklearn
regr = linear_model.LinearRegression()
regr.fit(x, y)
print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)
Intercept: 0.523472810777838 Coefficients: [ 0.04416149 -0.05509447 -0.34701547 0.05048021 0.13537195 0.1121347 -0.00067636 0.06063795]
import statsmodels.api as sm
# Add a constant
x = sm.add_constant(x)
# Fit OLS linear regression model using statsmodels
model = sm.OLS(y, x).fit()
predictions = model.predict(x)
print_model = model.summary()
print(print_model)
OLS Regression Results
==============================================================================
Dep. Variable: Admit rate R-squared: 0.390
Model: OLS Adj. R-squared: 0.316
Method: Least Squares F-statistic: 5.215
Date: Tue, 06 Dec 2022 Prob (F-statistic): 0.000123
Time: 15:01:10 Log-Likelihood: 20.346
No. Observations: 65 AIC: -24.69
Df Residuals: 57 BIC: -7.297
Df Model: 7
Covariance Type: nonrobust
===============================================================================================
coef std err t P>|t| [0.025 0.975]
-----------------------------------------------------------------------------------------------
const 0.4653 0.021 22.319 0.000 0.424 0.507
Major name_Anthropology 0.1023 0.062 1.637 0.107 -0.023 0.227
Major name_Chemistry 0.0031 0.059 0.052 0.959 -0.116 0.122
Major name_Computer science -0.2889 0.062 -4.622 0.000 -0.414 -0.164
Major name_Economics 0.1086 0.062 1.738 0.088 -0.016 0.234
Major name_History 0.1935 0.062 3.097 0.003 0.068 0.319
Major name_Philosophy 0.1703 0.062 2.725 0.009 0.045 0.295
Major name_Physics 0.0575 0.062 0.920 0.362 -0.068 0.183
Major name_Sociology 0.1188 0.062 1.901 0.062 -0.006 0.244
==============================================================================
Omnibus: 4.824 Durbin-Watson: 0.568
Prob(Omnibus): 0.090 Jarque-Bera (JB): 4.702
Skew: -0.653 Prob(JB): 0.0953
Kurtosis: 2.825 Cond. No. 1.35e+16
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 4.02e-31. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.